library(RODBC) library(knitr) library(tidyverse) knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE, error= TRUE) startTime <- Sys.time()
connectionString <- getConnectionString(params)
run_query <- function(query_text, ...){ result <- run_db_query(connectionString, query_text ) return(result) } get_connection <- function(){ connection <- get_new_connection(connectionString) return(connection) }
tbresult <-runTableReplacements(connectionString)
QA_Alert_Messages <- data.frame('Alert Code'=character(), 'Alert Message'=character(), stringsAsFactors = FALSE)
The purpose of the data quality program is to conduct data quality checks that correspond to similar data checks carried out by PCORnet (Version 7). The checks cover 4 realms of data quality: data model conformance, data plausibility, data completeness, and data persistence. The program uses a series of SQL queries operationalized using RStudio to produce this report. The program is designed to support tables/figures that provide detailed information about each data quality check as well as an alert if a data error appears to be present (based on criteria developed by PCORnet or the CHORDS team).
This data quality report was generated from CHORDS r params$DBName
.
Data Partner:
Analyst:
Query Run Date: r Sys.Date()
# Table to check for quality tableCheckList <- c(lab_results, encounters, prescribing,procedures , social_history , provider_specialty,diagnoses ,vital_signs , census_location, demographics , pro_surveys , pro_questions ,pro_responses) # table list in DB sqlConnection <- get_connection() dbTables <- sqlTables(get_connection()) RODBC::odbcClose(sqlConnection) tabledChecked <- vector(length=length(tableCheckList)) for (i in 1:length(tableCheckList)){ tabledChecked[i] <- sum(grepl(tableCheckList[i], dbTables$TABLE_NAME, ignore.case = T))>0 } missingTables <- setdiff(tableCheckList, tableCheckList[tabledChecked]) nonMissingTables <- setdiff(tableCheckList, missingTables) nonMissingDBTables <- subset(dbTables, grepl(paste(tableCheckList, collapse = '|'), TABLE_NAME, ignore.case = T), select=TABLE_NAME)[,,drop=T] missingTableMessage <- if(length(missingTables)==0) { "All tables accounted for." } else{ paste("The following tables are missing:", paste(missingTables, collapse = ','), sep = ' ') } QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.01",missingTableMessage)
## Data Check 1.02: Required tables are not populated table_N_lst <- list() #vector(length=length(nonMissingDBTables)) for(i in 1:length(nonMissingDBTables)){ table_N_lst[[i]] <- run_query(paste("select distinct '_aaa_' as _aaa_, count(*) as tabN from ",nonMissingDBTables[i], sep=' '))#[,'tabN',drop=T] # print(dbTables[i]) if (class(table_N_lst[[i]]) !='data.frame') {table_N_lst[[i]] <- NULL} } #length(dbTables) table_N_lst <- do.call('rbind', table_N_lst) [,'tabN',drop=T] nonPopTables <- nonMissingDBTables[table_N_lst==0] nonPopMessage <- if(length(nonPopTables)==0) {"All tables populated, among those present." } else{ paste("The following tables exist, but are not populated:", paste(nonPopTables, collapse = ','), sep = ' ') } QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.02",nonPopMessage)
sqlFiles <- system.file('sql', package='chordsTables') schema_validation_query <- read_file(dir(sqlFiles, "VDW_Schema_Validation.sql", f=TRUE)) connection103 <- get_connection() schema_validation <- sqlQuery(connection103, schema_validation_query, as.is=TRUE) odbcClose(connection103)
## Data Check 1.03: Required fields are not present missing_columns <- subset(schema_validation, Result == "TABLE/VIEW OR COLUMN MISSING") missing_columnsTables <- select(missing_columns, c("TableName", "ColumnName")) if (params$QAAlert == TRUE & nrow(missing_columnsTables) > 0){ missing_columns$message <- paste0(missing_columns$TableName, ".", missing_columns$ColumnName, sep="") QA_Alert_Message_103_items <- paste0(missing_columns$message, collapse = ', ') QA_Alert_Message_103 <- paste0("The following required fields are not present: ", QA_Alert_Message_103_items) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.03",QA_Alert_Message_103) }
misconfig <- subset(schema_validation, (Result == "CONFIG-MISMATCH" & ExpectedIsNullable == FoundIsNullable)) invalid_specs <- select(misconfig, c("TableName", "ColumnName", "ExpectedNumberPrecision", "FoundNumberPrecision", "ExpectedNumberScale", "FoundNumberScale", "ExpectedCharLength", "FoundCharLength", "ExpectedDatePrecision", "FoundDatePrecision")) if (params$QAAlert == TRUE & nrow(invalid_specs) > 0){ misconfig$message <- paste0(misconfig$TableName, ".", misconfig$ColumnName, sep="") QA_Alert_Message_104_items <- paste0(misconfig$message, collapse = ', ') QA_Alert_Message_104 <- paste0("The following required fields do not conform to data model specifications for data type, length, or name: ", QA_Alert_Message_104_items) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.04",QA_Alert_Message_104) }
primary_key_data <- run_query(" /*TABLEREPLACEMENTINFILE*/ /***************************************************************************** Script for validating the VDW Primary Keys. It will create two lists. One of the Expected Tables and their primary keys and another of all the tables currently in the partner VDW. It will validate: - Compare the expected primary key to the primary key of each table in a partner's VDW - The number of records that currently violate the exepcted primary key FOR DEVELOPERS: To Update the script for partner use: 1) Edit the #PKVALIDATION table insertion if the primary keys of the VDW have changed or add new entires for new tables. One row per primary key column. Examples below: Output: One output table 1) Shows the Table name, the expected primary key, the found primary key, and the result of the comparision between the expected and found primary keys. KeyMatchResultValues: - OK: Expected primary key matches the found primary key - KEY MISMATCH: Expected Primary Key does not matc the found primary key - TABLE OR KEYS NOT FOUND: No table in the VDW matches the expected table - UNKNOWN: An unknow error occured ViolationCount: - The number of records that that appear to violate the expected primary key. *****************************************************************************/ /***************************************************************************** BEGIN TempTable Clearing and Creation *****************************************************************************/ SET ANSI_NULLS ON; SET QUOTED_IDENTIFIER ON; SET NOCOUNT ON; SET ANSI_WARNINGS OFF; BEGIN IF OBJECT_ID('tempdb..#PKVALIDATION') IS NOT NULL BEGIN DROP TABLE #PKVALIDATION; END; CREATE TABLE #PKVALIDATION ( Table_Name VARCHAR(250) , Column_Name VARCHAR(250) ); IF OBJECT_ID('tempdb..#KeyMatchResult') IS NOT NULL BEGIN DROP TABLE #KeyMatchResult; END; IF OBJECT_ID('tempdb..#PKVIOLATIONS') IS NOT NULL BEGIN DROP TABLE #PKVIOLATIONS; END; CREATE TABLE #PKVIOLATIONS ( Table_Name VARCHAR(250) , Violation_Count BIGINT ); END; /***************************************************************************** END TempTable Clearing and Creation *****************************************************************************/ /***************************************************************************** BEGIN Table Create Section *****************************************************************************/ INSERT INTO #PKVALIDATION VALUES ( 'CENSUS_DEMOG', 'CENSUS_YEAR'), ( 'CENSUS_DEMOG', 'GEOCODE'), ( 'EVERNDC', 'NDC'), ( 'EVERNDC', 'GENERIC'), ( 'PROVIDER_SPECIALTY', 'PROVIDER'), ( 'DEATH', 'PERSON_ID'), ( 'CAUSE_OF_DEATH', 'PERSON_ID'), ( 'CAUSE_OF_DEATH', 'COD'), ( 'DEMOGRAPHICS', 'PERSON_ID'), ( 'LINKAGE', 'LINK_ID'), ( 'LINKAGE', 'LINE'), ( 'BENEFIT', 'BENEFIT_ID'), ( 'ENCOUNTERS', 'ENC_ID'), ( 'DIAGNOSES', 'DIAGNOSES_ID'), ( 'ENROLLMENT', 'PERSON_ID'), ( 'ENROLLMENT', 'ENR_START'), ( 'LAB_RESULTS', 'LAB_RESULTS_ID'), ( 'PRO_SURVEYS', 'PRO_ID'), ( 'PRO_QUESTIONS', 'PRO_ID'), ( 'PRO_QUESTIONS', 'QUESTION_ID'), ( 'PRO_QUESTIONS', 'QUESTION_VER'), ( 'PRO_RESPONSES', 'RESPONSE_ID'), ( 'PHARMACY', 'PHARMACY_ID'), ( 'PRESCRIBING', 'PRESCRIBING_ID'), ( 'PROCEDURES', 'PROCEDURES_ID'), ( 'SOCIAL_HISTORY', 'SOCIAL_HISTORY_ID'), ( 'VITAL_SIGNS', 'VITAL_SIGNS_ID'), ( 'TUMOR', 'TUMOR_ID'), ( 'LANGUAGES', 'PERSON_ID'), ( 'LANGUAGES', 'LANG_ISO'), ( 'CENSUS_LOCATION', 'PERSON_ID'), ( 'CENSUS_LOCATION', 'LOC_START'); /***************************************************************************** BEGIN Table Name Replacement: If a TableName replacement table exists, it will swap out the names in the tables for the correct ones based on how it's mapped in their table. *****************************************************************************/ BEGIN IF OBJECT_ID('CHORDS_TABLENAMES') IS NOT NULL BEGIN UPDATE a SET a.Table_Name = b.NEW_NAME FROM #PKVALIDATION a JOIN CHORDS_TABLENAMES b ON b.ORG_NAME = a.Table_Name; END; END; /***************************************************************************** END Table Name Replacement *****************************************************************************/ /***************************************************************************** BEGIN Analysis Section: Compares the partner's primary keys to the expected keys ******************************************************************************/ SELECT * INTO #KeyMatchResult FROM ( SELECT ExpectKeys.TABLE_NAME, ExpectKeys.COLUMN_NAMES AS Expected_Primary_Key, CurrKeys.COLUMN_NAMES AS Found_Primary_Key, CASE WHEN ob.type = 'V' THEN 'VIEW FOUND' WHEN CurrKeys.TABLE_NAME IS NULL THEN 'TABLE/VIEW OR KEYS NOT FOUND' WHEN ExpectKeys.COLUMN_NAMES != CurrKeys.COLUMN_NAMES THEN 'KEY MISMATCH' WHEN ExpectKeys.COLUMN_NAMES = CurrKeys.COLUMN_NAMES THEN 'OK' ELSE 'UNKNOWN ERROR' END AS Key_Match_Result FROM ( SELECT TABLE_NAME, LEFT(COL, LEN(COL) - 1) AS COLUMN_NAMES FROM ( SELECT DISTINCT TAB.TABLE_NAME TABLE_NAME, ( SELECT COL.Column_Name + ', ' AS [text()] FROM #PKVALIDATION COL WHERE COL.Table_Name = TAB.Table_Name ORDER BY COL.Column_Name FOR XML PATH('') ) COL FROM #PKVALIDATION TAB ) T WHERE T.COL IS NOT NULL ) ExpectKeys LEFT JOIN ( SELECT TABLE_NAME, LEFT(COL, LEN(COL) - 1) AS COLUMN_NAMES FROM ( SELECT DISTINCT TAB.TABLE_NAME TABLE_NAME, ( SELECT COL.COLUMN_NAME + ', ' AS [text()] FROM INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE COL WHERE COL.CONSTRAINT_NAME = TAB.CONSTRAINT_NAME AND COL.TABLE_NAME = TAB.TABLE_NAME AND CONSTRAINT_TYPE = 'PRIMARY KEY' ORDER BY COL.COLUMN_NAME FOR XML PATH('') ) COL FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS TAB ) T WHERE T.COL IS NOT NULL ) CurrKeys ON CurrKeys.TABLE_NAME = ExpectKeys.TABLE_NAME LEFT JOIN sys.objects ob ON ob.object_id = OBJECT_ID(ExpectKeys.TABLE_NAME) and ob.type in ('U', 'V')) KeyMatch; BEGIN DECLARE @SQL NVARCHAR(3000); DECLARE @Table_Name VARCHAR(100); DECLARE @Key_Columns VARCHAR(100); DECLARE CUR CURSOR FOR SELECT Table_Name, Expected_Primary_Key FROM #KeyMatchResult; OPEN CUR; FETCH NEXT FROM CUR INTO @Table_Name, @Key_Columns; WHILE @@FETCH_STATUS = 0 BEGIN SET @SQL = ' IF OBJECT_ID(''' + @Table_Name + ''') IS NOT NULL WITH CTEKEY AS (SELECT COUNT(*) KEYTOT FROM ( SELECT DISTINCT ' + @Key_Columns + ' FROM ' + @Table_Name + ') z), CTETOT AS (SELECT COUNT(*) TABTOT FROM ' + @Table_Name + ') INSERT INTO #PKVIOLATIONS SELECT ''' + @Table_Name + ''', CTETOT.TABTOT - CTEKEY.KEYTOT FROM CTETOT, CTEKEY;' EXEC Sp_executesql @SQL; FETCH NEXT FROM CUR INTO @Table_Name, @Key_Columns; END; CLOSE CUR; DEALLOCATE CUR; END SELECT m.*, v.Violation_Count FROM #KeyMatchResult m LEFT JOIN #PKVIOLATIONS v ON v.Table_Name = m.TABLE_NAME; /***************************************************************************** END Analysis Section *****************************************************************************/")
violated_primary_keys <- subset(primary_key_data, Key_Match_Result != "OK") if (params$QAAlert == TRUE & nrow(violated_primary_keys) > 0){ violated_primary_keys_table <- subset(violated_primary_keys, (Violation_Count > 0 & Key_Match_Result == "VIEW FOUND") | (Key_Match_Result != "VIEW FOUND")) violated_primary_keys_table$message <- paste0(violated_primary_keys_table$TABLE_NAME, sep="") QA_Alert_Message_105_items <- paste0(violated_primary_keys_table$message, collapse = ', ') QA_Alert_Message_105 <- paste0("The following tables have primary key definition errors: ", QA_Alert_Message_105_items) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.05",QA_Alert_Message_105) }
sqlFiles <- system.file('sql', package='chordsTables') data_validation_query <- read_file(dir(sqlFiles, "VDW_DataValidation.sql", f=TRUE)) connection106 <- get_connection() results <- sqlQuery(connection106, data_validation_query, as.is=TRUE) data_validation <- sqlQuery(connection106, " SELECT * FROM #CHORDSDataValueResults ORDER BY TargetTable; ", as.is = TRUE) ref_integrity <- sqlQuery(connection106, " SELECT * FROM #CHORDSReferentialIntegrityResults ORDER BY TargetTable; ", as.is = TRUE) odbcClose(connection106) if (params$QAAlert == TRUE & nrow(data_validation) > 0){ data_validation_table <- data.frame(data_validation, stringsAsFactors = FALSE) data_validation_table$message <- paste0(data_validation_table$TargetTable, ".", data_validation_table$TargetColumn, sep="") QA_Alert_Message_106_items <- paste0(data_validation_table$message, collapse = ', ') QA_Alert_Message_106 <- paste0("The following required fields contain values outside of data model specifications: ", QA_Alert_Message_106_items) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.06", QA_Alert_Message_106) }
nullableValues = subset(misconfig, (Result == "CONFIG-MISMATCH" & ExpectedIsNullable != FoundIsNullable)) nullableValues_Table <- select(nullableValues, c("TableName", "ColumnName", "ExpectedIsNullable", "FoundIsNullable")) if (params$QAAlert == TRUE & nrow(nullableValues_Table) > 0){ nullableValues_Table$message <- paste0(nullableValues_Table$TableName, ".", nullableValues_Table$ColumnName, sep="") QA_Alert_Message_107_items <- paste0(nullableValues_Table$message, collapse = ', ') QA_Alert_Message_107 <- paste0("The following required fields have non-permissible missing values: ", QA_Alert_Message_107_items) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.07",QA_Alert_Message_107) }
orphan_person_ids <- subset(ref_integrity, ((TargetColumn == "PERSON_ID" | ReferenceColumn == "PERSON_ID") & ValuesNotFound > 0)) orphan_person_ids_table <- select(orphan_person_ids, c("TargetTable", "TargetColumn", "ReferenceTable", "ReferenceColumn", "ValuesNotFound", "TargetTableDistinctCount", "PercentOfDistinctTargetColumnMissing")) if (params$QAAlert == TRUE & nrow(orphan_person_ids_table) > 0){ orphan_person_ids_sub_table <- data.frame(orphan_person_ids_table, stringsAsFactors = FALSE) orphan_person_ids_sub_table$message <- paste0(orphan_person_ids_sub_table$TargetTable, sep="") QA_Alert_Message_108_items <- paste0(orphan_person_ids_sub_table$message, collapse = ', ') QA_Alert_Message_108 <- paste0("The following tables contain orphan PERSON_IDs: ", QA_Alert_Message_108_items) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.08",QA_Alert_Message_108) }
orphan_enc_ids <- subset(ref_integrity, (TargetColumn == "ENC_ID" | ReferenceColumn == "ENC_ID") & ValuesNotFound > 0) orphan_enc_ids_table <- select(orphan_enc_ids, c("TargetTable", "TargetColumn", "ReferenceTable", "ReferenceColumn", "ValuesNotFound", "TargetTableDistinctCount", "PercentOfDistinctTargetColumnMissing")) if (params$QAAlert == TRUE & nrow(orphan_enc_ids_table) > 0){ orphan_person_ids_sub_table <- data.frame(orphan_enc_ids_table, stringsAsFactors = FALSE) orphan_person_ids_sub_table$message <- paste0(orphan_person_ids_sub_table$TargetTable, sep="") QA_Alert_Message_109_items <- paste0(orphan_person_ids_sub_table$message, collapse = ', ') QA_Alert_Message_109 <- paste0("The following tables contain orphan ENCOUNTER_IDs: ", QA_Alert_Message_109_items) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.09",QA_Alert_Message_109) }
repErrors <- run_query( paste0( " select 'Diagnosis' as tablename, count(*) as nrows, sum(case when a.person_id <> b.person_id then 1 else 0 end) as Person_ID, sum(case when a.adate <> b.adate then 1 else 0 end) as Adate, sum(case when a.enctype <> b.enctype then 1 else 0 end) as EncType from ", encounters, " as a inner join (select distinct enc_id, person_id, adate, enctype from ", diagnoses, ") as b on a.enc_id = b.enc_id union select 'Procedure' as tablename, count(*) as nrows, sum(case when a.person_id <> b.person_id then 1 else 0 end) as Person, sum(case when a.adate <> b.adate then 1 else 0 end) as Adate, sum(case when a.enctype <> b.enctype then 1 else 0 end) as EncType from ", encounters, " as a inner join (select distinct enc_id, person_id, adate, enctype from ", procedures, ") as b on a.enc_id = b.enc_id " )) repErrors_tx <- tidyr::gather(repErrors, 'field','n_bad',Person_ID, Adate, EncType) %>% arrange(tablename) %>% within({ NP_bad<- paste0(n_bad,' (',round(100*n_bad/nrows, 2),')') }) QA_Alert_Message_1.10 <- if(nrow(subset(repErrors, n_bad>0))>0) {"There are Replication errors between the ENCOUNTER, PROCEDURES and DIAGNOSIS tables"} else { "There were no Replication errors between the ENCOUNTER, PROCEDURES and DIAGNOSIS tables found." } QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.10",QA_Alert_Message_1.10)
GR1Person <- run_query( paste0( " select 'Encounter' as tablename, count(*) as n_encid, sum(case when N_person>1 then 1 else 0 end) as N_gr1 from ( select enc_id, count(distinct person_id) as N_person from ", encounters," group by enc_id ) as qry " )) GR1Person <- within(GR1Person, { pct <- round(100*N_gr1/n_encid, 2) }) #if(GR1Person$pct>5){ # print(paste0("WARNING: The encounter table has more than 5% of encounters assigned to more than 1 person (",GR1Person$pct,"%)")) #} else{ # print(paste0("The encounter table has ", GR1Person$pct,"% of encounters assigned to more than 1 person")) #} encItoManyMsg <- if(GR1Person$pct>5){ paste0("WARNING: The encounter table has more than 5% of encounters assigned to more than 1 person (",GR1Person$pct,"%)") } else{ paste0("The encounter table has ", GR1Person$pct,"% of encounters assigned to more than 1 person") } QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.11",encItoManyMsg)
orphan_provider_ids <- subset(ref_integrity, ((TargetColumn %in% c("RXMD", "PROVIDER") | ReferenceColumn %in% c("RXMD", "PROVIDER"))) & ValuesNotFound > 0) orphan_provider_ids_table <- select(orphan_provider_ids, c("TargetTable", "TargetColumn", "ReferenceTable", "ReferenceColumn", "ValuesNotFound", "TargetTableDistinctCount", "PercentOfDistinctTargetColumnMissing")) if (params$QAAlert == TRUE & nrow(orphan_provider_ids_table) > 0){ orphan_provider_ids_sub_table <- data.frame(orphan_provider_ids_table, stringsAsFactors = FALSE) orphan_provider_ids_sub_table$message <- paste0(orphan_provider_ids_sub_table$TargetTable, sep="") QA_Alert_Message_112_items <- paste0(orphan_provider_ids_sub_table$message, collapse = ', ') QA_Alert_Message_112 <- paste0("The following tables contain orphan PROVIDER_IDs: ", QA_Alert_Message_112_items) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.12",QA_Alert_Message_112) }
connection113 <- get_connection() code_conform_table_counts <- sqlQuery(connection113, paste0(" /* - DIAGNOSES: *ICD09:length is not between 3-5 OR has alpha characters other than E or V OR has no numeric characters OR first 3 digits (min length) are 0; *ICD10: length is not between 3 and 7 OR 1st character is not alpha OR first 3 digits (min length) are 0 or 9 OR has no numeric characters; - PROCEDURES: *CPT/HCPCS are length <5 OR first 5 are all 0 or 9 OR no numeric; *ICD9 is not length 3 or 4 OR any alpha OR all 0; *ICD10 is not length 7 OR no numeric OR all 7 are 0s or 9s; - PRESCRIBING: *flag codes with any alphabetical characters OR a length <2 or >7; - PHARMACY: *length not 11 OR any alpha OR a string of 0 or 9; - LAB_RESULT *flag for any alphabetical characters OR a length less than 3 or greater than 7 OR the absence of a dash after the next to last position; */ SET NOCOUNT ON; DROP TABLE IF EXISTS #VdwInvalidCodes; SELECT * INTO #VdwInvalidCodes FROM ( SELECT a.TableName, a.CodeType, a.Code, a.ValidResult FROM ( SELECT 'DIAGNOSES' AS TableName, DX AS Code, DX_CODETYPE AS CodeType, CASE WHEN LEN(replace(DX, '.', '')) NOT BETWEEN 3 AND 7 THEN 'Code Length must be between 3 and 7 characters (excluding \".\")' WHEN DX NOT LIKE '[A-TV-Z]%' THEN 'Starting character must be a letter and not \"U\"' WHEN SUBSTRING(DX, 1, 3) IN('000', '999') THEN 'Invalid Numeric Value Range' WHEN DX NOT LIKE '%[0-9]%' THEN 'Values must include a numerical component' ELSE 'OK' END AS ValidResult FROM ", diagnoses, " WHERE DX_CODETYPE = '10' UNION ALL SELECT 'DIAGNOSES' AS TableName, DX AS Code, DX_CODETYPE AS CodeType, CASE WHEN LEN(replace(DX, '.', '')) NOT BETWEEN 3 AND 5 THEN 'Length must be between 3 and 5 characters (excluding \".\")' WHEN(DX NOT LIKE '[EV]%' AND DX NOT LIKE '[0-9]%') THEN 'Starting Character must be \"E\", \"V\", or a number' WHEN SUBSTRING(DX, 1, 3) IN('000') THEN 'Invalid Numeric Value Range' WHEN DX NOT LIKE '%[0-9]%' THEN 'Numeric Values Not Detected' ELSE 'OK' END AS ValidResult FROM ", diagnoses, " WHERE DX_CODETYPE = '09' UNION ALL SELECT 'PROCEDURES' AS TableName, PX AS Code, PX_CODETYPE AS CodeType, CASE WHEN LEN(PX) < 5 THEN 'Code length must be less than 5 characters' WHEN SUBSTRING(PX, 1, 5) IN('00000', '99999') THEN 'Invalid Numeric Value' WHEN TRY_PARSE(PX AS INT) IS NULL THEN 'Non-numeric Characters Detected' ELSE 'OK' END AS ValidResult FROM ", procedures, " WHERE PX_CODETYPE = 'C4' UNION ALL SELECT 'PROCEDURES' AS TableName, PX AS Code, PX_CODETYPE AS CodeType, CASE WHEN LEN(replace(PX, '.', '')) NOT BETWEEN 3 AND 4 THEN 'Code length must be between 3 and 4 characters' WHEN PX NOT LIKE '[0-9]%' THEN 'Invalid Start Character' WHEN PX IN('00000') THEN 'Invalid Numeric Value' ELSE 'OK' END AS ValidResult FROM ", procedures, " WHERE PX_CODETYPE = '09' UNION ALL SELECT 'PROCEDURES' AS TableName, PX AS Code, PX_CODETYPE AS CodeType, CASE WHEN LEN(PX) != 7 THEN 'Code Length Must Equal 7' WHEN PX NOT LIKE '%[0-9]%' THEN 'Code values must include a numerical component' WHEN PX IN('0000000', '9999999') THEN 'Invalid Numeric Value' ELSE 'OK' END AS ValidResult FROM ", procedures," WHERE PX_CODETYPE = '10' UNION ALL SELECT 'PRESCRIBING' AS TableName, RXNORM AS Code, 'RXNORM' AS CodeType, CASE WHEN RXNORM LIKE '%[A-Z]%' THEN 'Code cannot contain alphabetical characters' WHEN LEN(RXNORM) NOT BETWEEN 2 AND 7 THEN 'Code length must be between 2 and 7 characters' ELSE 'OK' END AS ValidResult FROM ", prescribing, " UNION ALL SELECT 'PHARMACY' AS TableName, NDC AS Code, 'NDC' AS CodeType, CASE WHEN NDC LIKE '%[A-Z]%' THEN 'Code cannot contain alphabetical characters' WHEN LEN(NDC) != 11 THEN 'Code length must be 11 characters' WHEN NDC IN('00000000000', '99999999999') THEN 'Invalid Numeric Value' ELSE 'OK' END AS ValidResult FROM ", pharmacy, " UNION ALL SELECT 'LAB_RESULTS' AS TableName, LOINC AS Code, 'LOINC' AS CodeType, CASE WHEN LOINC LIKE '%[A-Z]%' THEN 'Code cannot contain alphabetical characters' WHEN LEN(LOINC) NOT BETWEEN 3 AND 7 THEN 'Code length must be between 2 and 7 characters' WHEN SUBSTRING(LEFT(REVERSE(RTRIM(LTRIM(LOINC))), 2), 2, 2) != '-' THEN 'No hyphen character in the second to last position' ELSE 'OK' END AS ValidResult FROM ", lab_results, " ) a WHERE a.validResult != 'OK' ) InvalidCodes; WITH CTE_CodeCounts AS (SELECT 'DIAGNOSES' AS TableName, DX_CODETYPE AS CodeType, COUNT(1) CountAll FROM ", diagnoses, " GROUP BY DX_CODETYPE UNION ALL SELECT 'PROCEDURES' AS TableName, PX_CodeType AS CodeType, COUNT(1) CountAll FROM ", procedures, " GROUP BY PX_CodeType UNION ALL SELECT 'PRESCRIBING' AS TableName, 'RXNORM' AS CodeType, COUNT(1) CountAll FROM ", prescribing, " UNION ALL SELECT 'PHARMACY' AS TableName, 'NDC' AS CodeType, COUNT(1) CountAll FROM ", pharmacy, " UNION ALL SELECT 'LAB_RESULTS' AS TableName, 'LOINC' AS CodeType, COUNT(1) CountAll FROM ", lab_results, " ) SELECT b.TableName, b.CodeType, cc.CountAll, b.CountInvalid AS CountInvalid, ROUND(CAST(b.CountInvalid AS DECIMAL) / CAST(cc.CountAll AS DECIMAL) * CAST(100.0 AS DECIMAL), 2) AS PercentInvalid FROM ( SELECT z.TableName, z.CodeType, COUNT(1) AS CountInvalid FROM #VdwInvalidCodes z GROUP BY z.TableName, z.CodeType ) b JOIN CTE_CodeCounts cc ON cc.TableName = b.TableName AND cc.CodeType = b.CodeType ORDER BY b.TableName, b.CodeType; ")) top_50_invalid <- sqlQuery(connection113, " SELECT TableName, CodeType, Code, ValidResult, CountInvalid FROM ( SELECT TableName, CodeType, Code, ValidResult, CountInvalid, ROW_NUMBER() OVER(PARTITION BY TableName ORDER BY CountInvalid DESC) RowNum FROM ( SELECT z.TableName, z.CodeType, z.Code, z.ValidResult, COUNT(1) AS CountInvalid FROM #VdwInvalidCodes z GROUP BY z.TableName, z.CodeType, z.Code, z.ValidResult ) InvalidCount ) Top50 WHERE RowNum <= 50 ORDER BY TableName, CodeType, CountInvalid Desc; ") odbcClose(connection113) if (params$QAAlert == TRUE){ code_conform_table_counts_table <- subset(code_conform_table_counts, PercentInvalid > 5.0) if (!rlang::is_empty(code_conform_table_counts_table)){ code_conform_table_counts_table$message <- paste0(code_conform_table_counts_table$TableName, sep="") QA_Alert_Message_113_items <- paste0(code_conform_table_counts_table$message, collapse = ', ') QA_Alert_Message_113 <- paste0("More than 5% of ICD, CPT, LOINC, RXCUI, or NDC codes do not conform to the expected length or content: ", QA_Alert_Message_113_items) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("1.13",QA_Alert_Message_113) } } knitr::kable(code_conform_table_counts, row.names = FALSE, col.names =c("Table Name", "Code Type", "Count All", "Count Invalid", "% Invalid"))
\newline
# DC 2.01 tabDates <- run_query(" SELECT DISTINCT a.name AS tabname, b.name AS colname FROM sys.objects a INNER JOIN sys.columns b ON a.object_id = b.object_id WHERE type IN('U', 'V') ORDER BY a.name;") %>% subset(grepl('date',colname, ignore.case = T)) modDates <- run_query("select name, modify_date from sys.objects WHERE type IN('U', 'V')") futureDateCode <- merge(tabDates, modDates, by.x='tabname', by.y='name') %>% within({ sqlcode <- paste0("select '",tabname, "' as tableName, '", colname,"' as dateName ,count(*) as nrows, sum(case when a.",colname,">b.modify_date then 1 else 0 end) as futureDate from ", tabname, " as a , (select modify_date from sys.objects where type IN('U', 'V') AND name ='",tabname,"') as b" ) }) #test <- run_query(futureDateCode[1, "sqlcode"]) fdatelst <- list() for (i in 1:nrow(futureDateCode)){ fdatelst[[i]] <- run_query(futureDateCode[i, "sqlcode"]) } fdatesdone <- do.call('rbind', fdatelst) %>% within(.,{ pctFuture <- round(100*futureDate/nrows, 4) }) #params$QAAlert<-F ### add message if no table is a problem futureDataMsg <- NULL futureDataMsg <- if(nrow(subset(fdatesdone, pctFuture>=5))>0 ){ paste("The follosing tables exceeded the 5% limit on future dates:", subset(fdatesdone, pctFuture>=5)$tableName, sep = ' ') } else if(nrow(subset(fdatesdone, pctFuture>=5))==0 ) { "No table exceeds the 5% limit on future dates." } QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("2.01",futureDataMsg)
# select count(*) as personCount, sum(case when (minage not between 0 and 89) or (maxage not between 0 and 89) then 1 else 0 end ) as N_outOfRange patAgeRange <- run_query(paste0(" select count(*) as nrows, sum(case when (minage <0) or (maxage <0) then 1 else 0 end ) as N_low, sum(case when (minage >=90) or (maxage >=90) then 1 else 0 end ) as N_high from ( select distinct b.*, datediff(YY, a.birth_date, b.first_dt) as minage, datediff(YY, a.birth_date, b.last_dt) as maxage from ",demographics, " as a inner join (select person_id, min(adate) as first_dt, max(adate) as last_dt from ",encounters, " group by person_id ) as b on a.person_id = b.person_id where a.birth_date is not null ) as qry ")) htOutOfRange <- run_query(paste0(" select count(*) as nrows, sum(case when ht <0.0 then 1 else 0 end ) as N_low, sum(case when ht>=95.0 then 1 else 0 end ) as N_high from ", vital_signs, " where ht is not null ")) wtOutOfRange <- run_query(paste0(" select count(*) as nrows, sum(case when wt <0.0 then 1 else 0 end ) as N_low, sum(case when wt>350 then 1 else 0 end ) as N_high from ", vital_signs, " where wt is not null ")) dbpOutOfRange <- run_query(paste0(" select count(*) as nrows, sum(case when diastolic <40 then 1 else 0 end ) as N_low, sum(case when diastolic>120 then 1 else 0 end ) as N_high from ", vital_signs, " where diastolic is not null ")) sbpOutOfRange <- run_query(paste0(" select count(*) as nrows, sum(case when systolic <40 then 1 else 0 end ) as N_low, sum(case when systolic>210 then 1 else 0 end ) as N_high from ", vital_signs, " where systolic is not null ")) daysSupplyOutOfRange <- run_query(paste0(" select count(*) as nrows, sum(case when rx_days_supply <1 then 1 else 0 end ) as N_low, sum(case when rx_days_supply>90 then 1 else 0 end ) as N_high from ", prescribing, " where rx_days_supply is not null ")) allOutOfRange <- rbind( cbind(table='Demographic/Encounter', item='Age (people)', low='< 0 yrs',high='> 89 yrs', patAgeRange), cbind(table='Vital_signs',item='Height (records)',low= '< 0 inches',high='> 0 inches', htOutOfRange), cbind(table='Vital_signs',item='Weight (records)',low= '< 0 pounds',high='> 350 pounds', wtOutOfRange), cbind(table='Vital_signs',item='Diastolic BP (records)',low= '< 40 mgHg',high='> 120 mgHg', dbpOutOfRange), cbind(table='Vital_signs',item='Systolic BP (records)',low= '< 40 mgHg',high='> 210 mgHg', sbpOutOfRange), cbind(table='Prescribing',item='Prescribed days supply (records)',low= '< 1 day',high='> 90 days', daysSupplyOutOfRange) ) allOutOfRange <- within(allOutOfRange, { NP_low = paste0(N_low,' (',round(100*N_low/nrows, 2),')') NP_high = paste0(N_high,' (',round(100*N_high/nrows, 2),')') }) #knitr::kable( #subset(allOutOfRange, select=c(table,item, low, high, nrows, NP_low, NP_high)), #col.names = c('Table','Field','Check Low','High','N','Low values N (%)','High values N (%)') #) outOfRangeMsg <- NULL outOfRangeMsg <- if(nrow(subset(allOutOfRange, (N_low+N_high)/nrows>=0.1))>0 ){ paste('More than 10% of records fall into the lowest or highest categories of age, height, weight, diastolic, blood pressure, systolic blood pressure, or dispensed days supply:', subset(allOutOfRange, (N_low+N_high)/nrows>=0.1)$item, sep=' ') } else if(nrow(subset(allOutOfRange, (N_low+N_high)/nrows>=0.1))==0 ) { "No table exceeds the 5% limit." } QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("2.02",futureDataMsg)
encTabBirth2.03 <- run_query(paste0(" select sum(dobPostAdate) as dobPostAdate, sum(dobPostDdate) as dobPostDdate from ( select a.person_id, max(case when datediff(day, a.birth_Date, b.adate)<0 and b.adate is not null then 1 else 0 end) as dobPostAdate, max(case when datediff(day, a.birth_Date, b.ddate)<0 and b.ddate is not null then 1 else 0 end) as dobPostDdate from (select distinct person_id, birth_date from ",demographics, " where birth_date is not null) as a inner join ", encounters, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) encTabDeath2.03 <- run_query(paste0(" select sum(deathPreAdate) as deathPreAdate, sum(deathPreDdate) as deathPreDdate from ( select a.person_id, max(case when datediff(day, a.deathdt, b.adate)>0 and b.adate is not null then 1 else 0 end) as deathPreAdate, max(case when datediff(day, a.deathdt, b.ddate)>0 and b.ddate is not null then 1 else 0 end) as deathPreDdate from (select distinct person_id, deathdt from ",death, " where deathdt is not null) as a inner join ", encounters, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) procTabBirth2.03 <- run_query(paste0(" select sum(dobPostProc) as dobPostProc from ( select a.person_id, max(case when datediff(day, a.birth_Date, procdate)<0 and b.procdate is not null then 1 else 0 end) as dobPostProc from (select distinct person_id, birth_date from ",demographics, " where birth_date is not null) as a inner join ", procedures, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) procTabDeath2.03 <- run_query(paste0(" select sum(deathPreProc) as deathPreProc from ( select a.person_id, max(case when datediff(day, a.deathdt, b.procdate)>0 and b.procdate is not null then 1 else 0 end) as deathPreProc from (select distinct person_id, deathdt from ",death, " where deathdt is not null) as a inner join ", procedures, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) vitalsTabBirth2.03 <- run_query(paste0(" select sum(dobPostMeasure) as dobPostMeasure from ( select a.person_id, max(case when datediff(day, a.birth_Date, b.measure_date)<0 and b.measure_date is not null then 1 else 0 end) as dobPostMeasure from (select distinct person_id, birth_date from ",demographics, " where birth_date is not null) as a inner join ", vital_signs, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) vitalsTabDeath2.03 <- run_query(paste0(" select sum(deathPreMeasure) as deathPreMeasure from ( select a.person_id, max(case when datediff(day, a.deathdt, b.measure_date)>0 and b.measure_date is not null then 1 else 0 end) as deathPreMeasure from (select distinct person_id, deathdt from ",death, " where deathdt is not null) as a inner join ", vital_signs, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) pharmTabBirth2.03 <- run_query(paste0(" select sum(dobPostRx) as dobPostRx from ( select a.person_id, max(case when datediff(day, a.birth_Date, b.rxdate)<0 and b.rxdate is not null then 1 else 0 end) as dobPostRx from (select distinct person_id, birth_date from ",demographics, " where birth_date is not null) as a inner join ", pharmacy, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) pharmTabDeath2.03 <- run_query(paste0(" select sum(deathPreRx) as deathPreRx from ( select a.person_id, max(case when datediff(day, a.deathdt, b.rxdate)>0 and b.rxdate is not null then 1 else 0 end) as deathPreRx from (select distinct person_id, deathdt from ",death, " where deathdt is not null) as a inner join ", pharmacy, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) prescribTabBirth2.03 <- run_query(paste0(" select sum(dobPostRxStart) as dobPostRxStart from ( select a.person_id, max(case when datediff(day, a.birth_Date, b.rx_start_date)<0 and b.rx_start_date is not null then 1 else 0 end) as dobPostRxStart from (select distinct person_id, birth_date from ",demographics, " where birth_date is not null) as a inner join ", prescribing, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) prescribTabDeath2.03 <- run_query(paste0(" select sum(deathPreRxStart) as deathPreRxStart from ( select a.person_id, max(case when datediff(day, a.deathdt, b.rx_start_date)>0 and b.rx_start_date is not null then 1 else 0 end) as deathPreRxStart from (select distinct person_id, deathdt from ",death, " where deathdt is not null) as a inner join ", prescribing , " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) labTabBirth2.03 <- run_query(paste0(" select sum(dobPostResult) as dobPostResult from ( select a.person_id, max(case when datediff(day, a.birth_Date, b.result_dt)<0 and b.result_dt is not null then 1 else 0 end) as dobPostResult from (select distinct person_id, birth_date from ",demographics, " where birth_date is not null) as a inner join ", lab_results, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) labTabDeath2.03 <- run_query(paste0(" select sum(deathPreResult) as deathPreResult from ( select a.person_id, max(case when datediff(day, a.deathdt, b.result_dt)>0 and b.result_dt is not null then 1 else 0 end) as deathPreResult from (select distinct person_id, deathdt from ",death, " where deathdt is not null) as a inner join ", lab_results, " as b on a.person_id = b.person_id group by a.person_id ) qry ") ) DeathBeforeBirth <- run_query(paste0(" select sum(deathPreBirth) as deathPreBirth from ( select a.person_id, max(case when datediff(day, a.deathdt, b.birth_date)>0 then 1 else 0 end) as deathPreBirth from (select distinct person_id, deathdt from ",death, " where deathdt is not null) as a inner join (select distinct person_id, birth_date from ",demographics, " where birth_date is not null) as b on a.person_id = b.person_id group by a.person_id ) qry ") ) patsInEncTab <- run_query( paste0("select count(distinct person_id) as n from ",encounters)) table2.03 <- cbind( encTabBirth2.03, encTabDeath2.03, procTabBirth2.03, procTabDeath2.03, vitalsTabBirth2.03, vitalsTabDeath2.03, pharmTabBirth2.03, pharmTabDeath2.03, prescribTabBirth2.03, prescribTabDeath2.03, labTabBirth2.03, labTabDeath2.03, DeathBeforeBirth ) labels2.03 <- data.frame(name=c( "dobPostAdate","dobPostDdate","deathPreAdate","deathPreDdate","dobPostProc","deathPreProc","dobPostMeasure","deathPreMeasure","dobPostRx","deathPreRx","dobPostRxStart","deathPreRxStart","dobPostResult","deathPreResult","deathPreBirth" ), newlabel = c('Adate<birth_date','Ddate<birth_date','Deathdt<adate','Deathdt<ddate','Procdate<birth_date','ProcDate>deathdt','Measure_date<birth_date','Measure_date>deathdt','rxdate<birth_date','rxdate>deathdt','rx_start_dt<birth_date','rx_start_dt>deathdt','Result_date<birth_date','Result_date>deathdt','Deathdt<birth_date') , srctab = c('Encounters','Encounters','Encounters','Encounters','Procedures','Procedures','Vital_signs','Vital_signs','Pharmacy','Pharmacy','Prescribing','Prescribing','lab_results','lab_results','Demographics and death') ) table2.03_tx <- tidyr::gather(table2.03, "comp", 'n') %>% within({ pct <- round(100*n/patsInEncTab$n, 2) }) table2.03_tx$ord <- 1:nrow(table2.03_tx) table2.03_tx <- table2.03_tx%>% merge(labels2.03, by.x='comp',by.y='name', all.x=T) %>% arrange(ord) #knitr::kable(table2.03_tx[,c('ord','comp','n','pct', 'srctab')], col.names = c(' ','Date comparison','N Patients','% of unique patients in the encounter table','Source table')) #params$QAAlert <- T illogicalDatesMsg <- NULL illogicalDatesMsg <- if(nrow(subset(table2.03_tx, pct>=5))>0){ paste("More than 5% of patients have illogical date relationships:", subset(table2.03_tx, pct>=5)$comp, sep=' ') } else if(nrow(subset(fdatesdone, pctFuture>=5))==0 ) { "No table exceeds the 5% limit on illogical dates." } QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("2.03",illogicalDatesMsg)
# to implement a new check add a new row to the following table: # table = the standard name of the table in the database (it will automaticaly correct for you if the actual table names are not standard) # var = the name of the variable to be chacked # condition = a valid logical condition ( in MS sql server language) which identifies a bad value # where = a valid where clause to subset the table, be sure to start with "WHERE", see example for ddate check3.03 <- rbind( data.frame(table='demographics', var='birth_date', condition='is null', where=' '), data.frame(table='demographics', var='gender', condition="not in ('M','F','O')", where=' '), data.frame(table='encounters', var='discharge_disposition', condition="not in ('A','E')", where=' '), data.frame(table='encounters', var='ddate', condition="is null", where="where enctype = 'IP'"), data.frame(table='procedures', var='procdate', condition='is null', where=' '), data.frame(table='prescribing', var='rx_order_date', condition='is null', where=' '), data.frame(table='pharmacy', var='not(rxsup>1 or rxamt>1)', condition=' ', where=' '), # compicated by it depending on two variables #data.frame(table='cause_of_death', var='source', condition="not in ('S', 'N', 'T', 'B', 'L', 'U', 'O')"), data.frame(table='diagnoses', var='dx_origin', condition="not in ('OD', 'BI', 'CL', 'PR', 'NI', 'OT')", where=' '), data.frame(table='diagnoses', var='enc_id', condition="is null", where=' '), data.frame(table='procedures', var='enc_id', condition="is null", where=' '), data.frame(table='vital_signs', var='enc_id', condition="is null", where=' ') ) res3.03 <- list() for(i in 1:nrow(check3.03)){ res3.03[[i]] <- with(check3.03, {run_query( paste("select '",table[i],"' as dataTable,'",var[i],"' as variable, count(*) as N_rows, sum(case when ",var[i],condition[i],"then 1 else 0 end) as N_bad", "from",eval(as.name(tolower(table[i]))), where[i] , sep=' ' ) )} ) } result3.03 <- do.call('rbind', res3.03) %>% within(.,{ pctBad <- round(100*N_bad/N_rows, 2) }) check3.03Msg <- NULL check3.03Msg <- if(nrow(subset(result3.03, pctBad>=10))>0){ paste("More than 10% of records have missing or unknown values for the following fields Alert:",subset(result3.03, pctBad>=10)$variable , sep = ' ' ) } else if(nrow(subset(fdatesdone, pctFuture>=5))==0 ) { "No check exceeds the 10% limit on bad values." } QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("3.03",check3.03Msg)
patsWithEncs <- run_query(paste0("select count(*) as nrows, sum(patsWEncDiag) as patsWEncDiag, sum(patsWEncProc) as patsWEncProc from ( select case when person_id in (select person_id from ", diagnoses, ") then 1 else 0 end as patsWEncDiag", ",case when person_id in (select person_id from ", procedures, ") then 1 else 0 end as patsWEncProc", " from ", demographics, " where person_id in (select person_id from ", encounters, ") ) as q") ) patsWithEncs <- within(patsWithEncs, { pctWEncDiag <- 100* patsWEncDiag/nrows pctWEncproc <- 100* patsWEncProc/nrows }) patDiagMsg <- with (patsWithEncs,{ if(pctWEncDiag<50 ){ paste0('WARNING: Only ',round(pctWEncDiag,1),'% of patients with encounters have diagnoses') } else { paste0('NOTE: ',round(pctWEncDiag,1),'% of patients with encounters have diagnoses') } }) patProcMsg <- with (patsWithEncs,{ if(pctWEncproc<50 ){ paste0('WARNING: Only ',round(pctWEncproc,1),'% of patients with encounters have procedures') } else { paste0('NOTE: ',round(pctWEncproc,1),'% of patients with encounters have procedures') } }) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("3.04",patProcMsg) QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("3.05",patDiagMsg)
ipedei_no_principal_diag <- run_query(paste0("SELECT InpatientEnc, InpatientEnc - InpatientEncWPrincipalDiag AS InpatientEncWOPrincipalDiag, CAST(ROUND((1.0 - CAST(InpatientEncWPrincipalDiag AS FLOAT) / CAST(InpatientEnc AS FLOAT)) * 100.0, 2) as DECIMAL(5,2)) AS InpatientEncWOPrincipalDiagPcnt FROM ( SELECT COUNT(e.ENC_ID) AS InpatientEnc, SUM(IIF(d.ENC_ID IS NOT NULL, 1, 0)) AS InpatientEncWPrincipalDiag FROM ", encounters ," e LEFT JOIN ( SELECT DISTINCT ENC_ID FROM ", diagnoses, " d WHERE d.PRINCIPAL_DX = 'P' AND d.DX_ORIGIN != 'PR' ) d ON d.ENC_ID = e.ENC_ID WHERE e.ENCTYPE IN('IP', 'EI') ) PxDiags;")) if (params$QAAlert == TRUE){ ipedei_no_principal_diag_table <- subset(ipedei_no_principal_diag, InpatientEncWOPrincipalDiagPcnt > 10.0) if (!rlang::is_empty(ipedei_no_principal_diag_table) & nrow(ipedei_no_principal_diag_table) > 0){ QA_Alert_Message_306 <- paste0("More than 10% of IP (inpatient) encounters with any diagnosis don't have a principal diagnosis") QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("3.06",QA_Alert_Message_306) } }
benchmark_start <- run_query(paste0(" SET NOCOUNT ON; DECLARE @BenchmarkStartDate DATE; SET @BenchmarkStartDate = ( SELECT IIF(MAX(x.ADATE) <= GETDATE(), MAX(x.ADATE), GETDATE()) FROM ", encounters, " x ); SELECT @BenchmarkStartDate; ")) data_result_75_complete <- run_query(str_replace_all(paste0(" SET NOCOUNT ON; DECLARE @BenchmarkPriorYearAvg INT; SET @BenchmarkPriorYearAvg = ( SELECT COUNT(1) / 12 FROM ", encounters, " e WHERE e.ENCTYPE IN('IP', 'AV', 'ED', 'EI') AND e.ADATE >= DATEADD(MONTH, -24, @BenchmarkStartDate) AND e.ADATE <= DATEADD(MONTH, -12, @BenchmarkStartDate) ); SELECT FORMAT(ADATE, 'MM-MMM') Month, @BenchmarkPriorYearAvg AS BenchmarkCount, COUNT(1) PriorMonthCount, CAST(ROUND((CAST(COUNT(1) AS FLOAT) / @BenchmarkPriorYearAvg * 100.0), 2) as DECIMAL(5,2)) AS PercentofBenchMark FROM ", encounters, " e WHERE e.ENCTYPE IN('IP', 'AV', 'ED', 'EI') AND e.ADATE >= DATEADD(MONTH, -12, @BenchmarkStartDate) GROUP BY FORMAT(ADATE, 'MM-MMM') ORDER BY FORMAT(ADATE, 'MM-MMM'); "), "@BenchmarkStartDate", paste0('\'', benchmark_start[1,1], '\'', sep=''))) data_result_75_complete$PercentofBenchMark <- as.numeric(as.character(data_result_75_complete$PercentofBenchMark))
knitr::kable(QA_Alert_Messages, row.names = FALSE)
knitr::kable(missing_columnsTables, row.names = FALSE, col.names = c("Table Name", "Column Name"))
knitr::kable(invalid_specs, row.names = FALSE, col.names = c("Table Name", "Column Name", "Expected Number Precision", "Found Number Precision", "Expected Number Scale", "Found Number Scale", "Expected Char Length", "Found Char Length", "Expected Date Precision", "Found Date Precision"))
knitr::kable(violated_primary_keys, row.names = FALSE)
knitr::kable(data_validation)
knitr::kable(nullableValues_Table, row.names = FALSE, col.names = c("Table Name", "Column Name", "Expected Field Allows-Null Value", "Found Field Allows -Null Value"))
knitr::kable(orphan_person_ids_table, row.names = FALSE, col.names = c("Target Table", "Target Column", "Reference Table", "Reference Column", "Count of Values Not Found", "Distinct Total in Target Table", "Percentage of Missing to Distinct Total"))
knitr::kable(orphan_enc_ids_table, row.names = FALSE, col.names = c("Target Table", "Target Column", "Reference Table", "Reference Column", "Count of Values Not Found", "Distinct Total in Target Table", "Percentage of Missing to Distinct Total"))
knitr::kable(subset(repErrors_tx, select=c(tablename, nrows, field, NP_bad)), col.names = c('Table','N','Field','Errors N(%)'))
knitr::kable(orphan_provider_ids_table, row.names = FALSE, col.names = c("Target Table", "Target Column", "Reference Table", "Reference Column", "Count of Values Not Found", "Distinct Total in Target Table", "Percentage of Missing to Distinct Total"))
cat("Top 50 Invalid Codes") knitr::kable(top_50_invalid, row.names = FALSE, col.names = c("Table Name", "Code Type", "Code", "Valid Result", "Count Invalid"))
futureDataMsg <- if(nrow(subset(fdatesdone, pctFuture>=5))>0 & params$QAAlert==F){ knitr::kable(subset(fdatesdone, pctFuture>=5), col.names = c('Table','Date name','Total rows','N future dates','%')) } else if(nrow(subset(fdatesdone, pctFuture>=5))==0 & params$QAAlert==F) { "No table exceeds the 5% limit on future dates." } else if ( params$QAAlert==T) { knitr::kable(fdatesdone, col.names = c('Table','Date name','Total rows','N future dates','%')) } futureDataMsg
outOfRangeMsg <- NULL outOfRangeMsg <- if(nrow(subset(allOutOfRange, (N_low+N_high)/nrows>=0.1))>0 & params$QAAlert==F){ knitr::kable( subset(subset(allOutOfRange, (N_low+N_high)/nrows>=0.1), select=c(table,item, low, high, nrows, NP_low, NP_high)), col.names = c('Table','Field','Check Low','High','N','Low values N (%)','High values N (%)') ) } else if(nrow(subset(allOutOfRange, (N_low+N_high)/nrows>=0.1))==0 & params$QAAlert==F) { "No table exceeds the 5% limit." } else if ( params$QAAlert==T) { knitr::kable( subset(allOutOfRange, select=c(table,item, low, high, nrows, NP_low, NP_high)), col.names = c('Table','Field','Check Low','High','N','Low values N (%)','High values N (%)') ) } outOfRangeMsg
illogicalDatesMsg <- NULL illogicalDatesMsg <- if(nrow(subset(table2.03_tx, pct>=5))>0 & params$QAAlert==F){ knitr::kable(subset(table2.03_tx, pct>=5)[,c('ord','comp','n','pct', 'srctab')], col.names = c(' ','Date comparison','N Patients','% of unique patients in the encounter table','Source table')) } else if(nrow(subset(fdatesdone, pctFuture>=5))==0 & params$QAAlert==F) { "No table exceeds the 5% limit on illogical dates." } else if ( params$QAAlert==T) { knitr::kable(table2.03_tx[,c('ord','comp','n','pct', 'srctab')], col.names = c(' ','Date comparison','N Patients','% of unique patients in the encounter table','Source table')) }
check3.03Msg <- NULL check3.03Msg <- if(nrow(subset(result3.03, pctBad>=10))>0 & params$QAAlert==F){ knitr::kable(subset(result3.03, pctBad>=10)[,c('dataTable','variable','N_rows','N_bad', 'pctBad')], col.names = c('Table','Variable','N Rows','N with bad values','%')) } else if(nrow(subset(fdatesdone, pctFuture>=5))==0 & params$QAAlert==F) { "No check exceeds the 10% limit on bad values." } else if ( params$QAAlert==T) { knitr::kable(result3.03[,c('dataTable','variable','N_rows','N_bad', 'pctBad')], col.names = c('Table','Variable','N Rows','N with bad values','%'))}
knitr::kable(ipedei_no_principal_diag, row.names = FALSE, col.names = c("In-Patient Type Encounters", "In-Patient Encounters W/O Principal Diagnoses", "% of In-Patient Type Encounters W/O Principal Diagnoses"))
cat(paste0('Benchmark Start Date: ', benchmark_start[1,1])) if (params$QAAlert == TRUE){ data_result_75_complete_table <- subset(data_result_75_complete, PercentofBenchMark < 75 ) if (!rlang::is_empty(data_result_75_complete_table) & nrow(data_result_75_complete_table) > 0){ QA_Alert_Message_307 <- paste0("Encounters, diagnoses, or procedures in an ambulatory (AV), emergency department (ED), or inpatient (IP) setting are less than 75% complete three months prior to the current month") QA_Alert_Messages[nrow(QA_Alert_Messages) + 1,] = c("3.07",QA_Alert_Message_307) } } knitr::kable(data_result_75_complete,row.names = FALSE, col.names = c("Month", "Benchnmark Count", "Prior Month Count", "% of Benchmark"))
# close odbc odbcCloseAll() endTime <- Sys.time() runtime <- endTime - startTime
Query run time = r runtime
minutes
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.